library(data.table)
data.table 1.15.2 using 80 threads (see ?getDTthreads). Latest news: r-datatable.com
library(readxl)
library(ggplot2)
library(gprofiler2)
library(glmnet)
Loading required package: Matrix
Loaded glmnet 4.1-8
library(ComplexHeatmap)
Loading required package: grid
========================================
ComplexHeatmap version 2.14.0
Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
Github page: https://github.com/jokergoo/ComplexHeatmap
Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
If you use it in published research, please cite either one:
- Gu, Z. Complex Heatmap Visualization. iMeta 2022.
- Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
genomic data. Bioinformatics 2016.
The new InteractiveComplexHeatmap package can directly export static
complex heatmaps into an interactive Shiny app with zero effort. Have a try!
This message can be suppressed by:
suppressPackageStartupMessages(library(ComplexHeatmap))
========================================
library(pROC)
Type 'citation("pROC")' for a citation.
Attaching package: ‘pROC’
The following objects are masked from ‘package:stats’:
cov, smooth, var
library(caret)
Loading required package: lattice
library(mixOmics)
Loading required package: MASS
Loaded mixOmics 6.22.0
Thank you for using mixOmics!
Tutorials: http://mixomics.org
Bookdown vignette: https://mixomicsteam.github.io/Bookdown
Questions, issues: Follow the prompts at http://mixomics.org/contact-us
Cite us: citation('mixOmics')
Attaching package: ‘mixOmics’
The following objects are masked from ‘package:caret’:
nearZeroVar, plsda, splsda
set.seed(101)
# read meta data
meta <- as.data.table(readxl::read_excel("data/transcritome_patients_translated.xlsx"))
meta_key <- "CEL_FILE"
setkeyv(meta, meta_key) # set key
stopifnot(!any( duplicated(meta[,..meta_key]) )) # check for duplicate rows
stopifnot(!any( duplicated(colnames(meta)) )) # check for duplicate columns
# read expression data
expr <- as.data.table(readxl::read_excel("data/transcriptome_expression_matrix.xlsx"))
New names:
expr_key <- "gene"
colnames(expr)[1] <- expr_key
setkeyv(expr, expr_key) # set key
stopifnot(!any( duplicated(expr[,..expr_key]) )) # check for duplicate rows
stopifnot(!any( duplicated(colnames(expr)) )) # check for duplicate columns
expr <- expr[, c(key(expr), meta$CEL_FILE), with=F]
stopifnot(all(colnames(expr)[-1]==meta[,CEL_FILE]))
expr <- as.matrix(expr, rownames="gene")
meta_M0 <- meta[LOCATION=="M0"]
meta_MI <- meta[LOCATION=="MI"]
meta_M6 <- meta[LOCATION=="M6"]
meta_M0M6 <- meta[LOCATION=="M6"|LOCATION=="M0"]
# read mouse signature
signature_mouse <- fread("data/signature.csv",header = F)[[1]]
# read all mouse genes measured with nanostring
measured_genes_mouse <- fread("data/All samples_NormalizedData.csv")[[1]]
# check that all signature genes are part of the measured genes
stopifnot(all(signature_mouse %in% measured_genes_mouse))
# map to human orthologs
ortholog_mapping <- as.data.table(gprofiler2::gorth(measured_genes_mouse, source_organism = "mmusculus", target_organism = "hsapiens", filter_na = F))
stopifnot(all(measured_genes_mouse %in% ortholog_mapping$input))
# mark signature genes and hits in the human expr data
ortholog_mapping$in_expr <- ortholog_mapping$ortholog_name %in% rownames(expr)
ortholog_mapping$in_signature <- ortholog_mapping$input %in% signature_mouse
# count number of hits in the human expr data per mouse gene
n_in_expr <- ortholog_mapping[,.("n_hits" = sum(in_expr)),by=input]
# print number of hits in expr$gene per measured mouse gene
table(n_in_expr$n_hits)
0 1 2 3 6 7
48 708 8 2 1 1
# select only mouse genes with orthologs uniquely mapped to the human expression data
unique_ortholog_mapping <- ortholog_mapping[input %in% n_in_expr[n_hits==1,input] & in_expr]
# unmapped signature genes
print(paste0(nrow(unique_ortholog_mapping)," out of ",length(measured_genes_mouse), " measured mouse genes were mapped to the human expression genes"))
[1] "708 out of 768 measured mouse genes were mapped to the human expression genes"
ortholog_mapping[!input %in% unique_ortholog_mapping$input & in_signature]
plot_data <- data.table(sample=colnames(expr))
plot_data[,colSum:=colSums(expr)]
plot_data[,RECURRENCE:=meta$RECURRENCE]
plot_data[,LOCATION:=meta$LOCATION]
plot_data[,GENDER:=meta$GENDER]
ggplot(plot_data,aes(x=colSum,fill=RECURRENCE)) +
geom_density(alpha=0.5)
ggplot(plot_data,aes(x=colSum,fill=LOCATION)) +
geom_density(alpha=0.5)
ggplot(plot_data,aes(x=colSum,fill=GENDER)) +
geom_density(alpha=0.5)
table(meta$RECURRENCE, meta$LOCATION)
Ctrl M0 M6 MI
Ctrl 25 0 0 0
NR 0 57 36 43
R 0 139 85 104
column_ha = HeatmapAnnotation(GENDER = meta$GENDER, LOCATION = meta$LOCATION, RECURRENCE=meta$RECURRENCE)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_M0$GENDER, LOCATION = meta_M0$LOCATION, RECURRENCE=meta_M0$RECURRENCE)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M0$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_M6$GENDER, LOCATION = meta_M6$LOCATION, RECURRENCE=meta_M6$RECURRENCE)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M6$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_MI$GENDER, LOCATION = meta_MI$LOCATION, RECURRENCE=meta_MI$RECURRENCE)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_MI$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_M0M6$GENDER, LOCATION = meta_M0M6$LOCATION, RECURRENCE=meta_M0M6$RECURRENCE)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M0M6$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(Recurrence=ifelse(meta_M0$RECURRENCE=="R", "Recurrent", "Not recurrent"),
col = list(Recurrence= c("Recurrent" = "orange", "Not recurrent" = "blue"))
)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M0$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
row_names_gp = gpar(fontsize = 10),
)
meta_M0_separated <- meta_M0[order(RECURRENCE)]
column_ha = HeatmapAnnotation(Recurrence=ifelse(meta_M0_separated$RECURRENCE=="R", "Recurrent", "Not recurrent"),
col = list(Recurrence= c("Recurrent" = "orange", "Not recurrent" = "blue"))
)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M0_separated$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
cluster_columns = F,
name = "Expression",
row_names_gp = gpar(fontsize = 10),
)
column_ha = HeatmapAnnotation(Location=meta_M0M6$LOCATION,
col = list(Location= c("M6" = "orange", "M0" = "blue"))
)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M0M6$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
row_names_gp = gpar(fontsize = 10),
)
meta_M0M6_separated <- meta_M0M6[order(LOCATION)]
column_ha = HeatmapAnnotation(Location=meta_M0M6_separated$LOCATION,
col = list(Location= c("M6" = "orange", "M0" = "blue"))
)
Heatmap(t(apply( expr[unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name],meta_M0M6_separated$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
cluster_columns = F,
name = "Expression",
row_names_gp = gpar(fontsize = 10),
)
n_genes <- length(unique_ortholog_mapping[in_expr==T&in_signature==T,ortholog_name])
high_var_genes <- sort(apply(expr, 1, sd), decreasing = T)[1:n_genes]
column_ha = HeatmapAnnotation(GENDER = meta$GENDER, LOCATION = meta$LOCATION, RECURRENCE=meta$RECURRENCE)
Heatmap(t(apply( expr[names(high_var_genes),meta$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_M0$GENDER, LOCATION = meta_M0$LOCATION, RECURRENCE=meta_M0$RECURRENCE)
Heatmap(t(apply( expr[names(high_var_genes),meta_M0$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_M6$GENDER, LOCATION = meta_M6$LOCATION, RECURRENCE=meta_M6$RECURRENCE)
Heatmap(t(apply( expr[names(high_var_genes),meta_M6$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_MI$GENDER, LOCATION = meta_MI$LOCATION, RECURRENCE=meta_MI$RECURRENCE)
Heatmap(t(apply( expr[names(high_var_genes),meta_MI$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
column_ha = HeatmapAnnotation(GENDER = meta_M0M6$GENDER, LOCATION = meta_M0M6$LOCATION, RECURRENCE=meta_M0M6$RECURRENCE)
Heatmap(t(apply( expr[names(high_var_genes),meta_M0M6$CEL_FILE] ,1,scale)),
show_column_names = F,
top_annotation = column_ha,
name = "Expression",
)
high_var_genes <- sort(apply(expr[,meta_M6$CEL_FILE], 1, sd), decreasing = T)[1:5000]
pls_da_expr <- t(expr[names(high_var_genes),meta_M0$CEL_FILE])
#pls_da_expr <- 2 ^ pls_da_expr
pca.expr <- pca(pls_da_expr, ncomp = 3, scale = TRUE)
plotIndiv(pca.expr, group = meta_M0$RECURRENCE, ind.names = FALSE,
legend = TRUE,
title = 'PCA comp 1 - 2')
plsda.expr <- plsda(pls_da_expr, meta_M0$RECURRENCE, ncomp = 10)
perf.plsda.expr <- perf(plsda.expr, validation = 'Mfold', folds = 3,
progressBar = FALSE,
nrepeat = 10)
plot(perf.plsda.expr, sd = TRUE, legend.position = 'horizontal')
logistic_glmnet <- function(meta, expr, signature){
x <- t(expr[signature, meta$CEL_FILE])
y <- meta$RECURRENCE
fit <- glmnet(x, y, family = "binomial")
plot(fit, label = T)
cvfit <- cv.glmnet(x, y, family = "binomial", type.measure = "class")
plot(cvfit)
print(cvfit)
}
logistic_glm <- function(meta, expr, signature, roc=F, summary=F, performance=F){
x <- t(expr[signature, meta$CEL_FILE])
y <- meta$RECURRENCE
data <- as.data.frame(x)
data$RECURRENCE <- 0
data$RECURRENCE[y=="R"] <- 1
glm_model <- glm(RECURRENCE ~.,family = "binomial", data)
# prediction
model_prob = predict(glm_model, type = "response")
model_pred = ifelse(model_prob > 0.5, "R", "NR")
train_tab = table(predicted = model_pred, actual = y)
train_con_mat = confusionMatrix(train_tab)
if(summary){
print(summary(glm_model))
}
if(roc){
roc(y ~ model_prob, plot = TRUE, print.auc = TRUE)
}
if(performance){
print(train_con_mat)
}
train_con_mat$overall["Accuracy"]
}
signature_human <- unique_ortholog_mapping[in_signature==T,ortholog_name]
Feature selection with logistic glmnet
logistic_glmnet(meta_M0, expr, signature_human)
Call: cv.glmnet(x = x, y = y, type.measure = "class", family = "binomial")
Measure: Misclassification Error
Lambda Index Measure SE Nonzero
min 0.06086 1 0.2908 0.02503 0
1se 0.06086 1 0.2908 0.02503 0
Logistic regression with full signature
# Analysis with glm
accuracy <- logistic_glm(meta_M0, expr, signature_human, T, T, T)
Call:
glm(formula = RECURRENCE ~ ., family = "binomial", data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.6967 -0.8290 0.5509 0.8016 1.8942
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -15.06772 15.79400 -0.954 0.3401
ALDOB -0.92023 0.41840 -2.199 0.0278 *
APOA1 0.10998 0.27791 0.396 0.6923
APOB 0.52203 0.30590 1.707 0.0879 .
APOC3 -0.27377 0.29954 -0.914 0.3607
ARG1 0.14516 1.07260 0.135 0.8923
ASNS -0.42582 0.53167 -0.801 0.4232
CACNA1A 0.16560 0.62152 0.266 0.7899
CD274 0.07812 0.37852 0.206 0.8365
CPS1 0.21568 0.37820 0.570 0.5685
CREB3L3 0.05876 0.34063 0.173 0.8630
CXCL9 0.25207 0.28752 0.877 0.3806
DMGDH -1.29012 0.89202 -1.446 0.1481
DUOX2 -0.05173 0.18021 -0.287 0.7741
FAHD1 0.41210 0.68509 0.602 0.5475
ICOS -0.50734 0.48449 -1.047 0.2950
IDO1 -0.24146 0.40907 -0.590 0.5550
INMT 0.74965 0.38851 1.930 0.0537 .
KYAT3 1.29103 0.80823 1.597 0.1102
NOS2 -0.06091 0.35451 -0.172 0.8636
NOX1 0.52374 0.55059 0.951 0.3415
OTC -0.03520 0.42550 -0.083 0.9341
PCK1 0.16418 0.17516 0.937 0.3486
PDK4 -0.13744 0.24200 -0.568 0.5701
PHGDH -0.16188 0.64987 -0.249 0.8033
PSAT1 -0.19922 0.44249 -0.450 0.6526
PTK6 -0.55283 0.65167 -0.848 0.3963
RIMKLA 0.94240 0.71040 1.327 0.1847
SLC7A11 0.42755 0.28598 1.495 0.1349
SPIB 0.20429 0.31458 0.649 0.5161
STAT1 0.80068 0.72341 1.107 train_con_mat
TLR4 -0.34050 0.39892 -0.854 0.3934
TNF -0.34636 0.39681 -0.873 0.3827
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 236.33 on 195 degrees of freedom
Residual deviance: 200.82 on 163 degrees of freedom
AIC: 266.82
Number of Fisher Scoring iterations: 5
Setting levels: control = NR, case = R
Setting direction: controls < cases
Confusion Matrix and Statistics
actual
predicted NR R
NR 19 9
R 38 130
Accuracy : 0.7602
95% CI : (0.6942, 0.8182)
No Information Rate : 0.7092
P-Value [Acc > NIR] : 0.06559
Kappa : 0.316
Mcnemar's Test P-Value : 4.423e-05
Sensitivity : 0.33333
Specificity : 0.93525
Pos Pred Value : 0.67857
Neg Pred Value : 0.77381
Prevalence : 0.29082
Detection Rate : 0.09694
Detection Prevalence : 0.14286
Balanced Accuracy : 0.63429
'Positive' Class : NR
Logistic regression with random signatures
random_signatures <- lapply(1:1000,function(i){
sample(unique_ortholog_mapping$ortholog_name,length(signature_human))
})
random_accuracy <- sapply(random_signatures, function(random_signature){
logistic_glm(meta_M0, expr, random_signature)
})
ggplot(data.frame(random_accuracy=random_accuracy), aes(x=random_accuracy)) +
geom_histogram(bins=30) +
geom_vline(xintercept=accuracy, color="red")
Feature selection with logistic glmnet
logistic_glmnet(meta_M6, expr, signature_human)
Call: cv.glmnet(x = x, y = y, type.measure = "class", family = "binomial")
Measure: Misclassification Error
Lambda Index Measure SE Nonzero
min 0.136 1 0.2975 0.05474 0
1se 0.136 1 0.2975 0.05474 0
Logistic regression with full signature
# Analysis with glm
accuracy <- logistic_glm(meta_M6, expr, signature_human, T, T, T)
Call:
glm(formula = RECURRENCE ~ ., family = "binomial", data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3392 -0.5410 0.2409 0.6472 1.6068
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -11.93445 27.08750 -0.441 0.65951
ALDOB 0.95027 1.21594 0.782 0.43450
APOA1 -1.62620 1.12759 -1.442 0.14925
APOB 0.95201 1.37919 0.690 0.49003
APOC3 0.27513 0.96583 0.285 0.77575
ARG1 -0.62188 2.15069 -0.289 0.77246
ASNS -1.23129 0.95416 -1.290 0.19689
CACNA1A -2.16046 1.20953 -1.786 0.07407 .
CD274 0.99056 0.86740 1.142 0.25346
CPS1 -0.13152 1.07094 -0.123 0.90226
CREB3L3 0.47578 0.61427 0.775 0.43861
CXCL9 0.18221 0.49859 0.365 0.71477
DMGDH -1.96184 2.60050 -0.754 0.45060
DUOX2 0.28120 0.28756 0.978 0.32814
FAHD1 3.23421 1.45507 2.223 0.02623 *
ICOS 0.47828 0.98646 0.485 0.62779
IDO1 -0.03772 0.79989 -0.047 0.96239
INMT -0.56813 1.15972 -0.490 0.62421
KYAT3 0.55285 1.49817 0.369 0.71212
NOS2 -1.08783 0.58724 -1.852 0.06396 .
NOX1 0.50637 0.96109 0.527 0.59828
OTC -1.05608 1.05991 -0.996 0.31906
PCK1 0.42407 0.54764 0.774 0.43872
PDK4 -1.22094 0.63454 -1.924 0.05434 .
PHGDH 5.37410 1.49335 3.599 0.00032 ***
PSAT1 -0.10896 0.70550 -0.154 0.87726
PTK6 -2.29695 1.14187 -2.012 0.04426 *
RIMKLA -0.63140 1.50560 -0.419 0.67495
SLC7A11 -0.21761 0.75281 -0.289 0.77253
SPIB -0.68279 0.81850 -0.834 0.40417
STAT1 -0.35248 0.99028 -0.356 0.72189
TLR4 -0.15202 0.82122 -0.185 0.85314
TNF 2.17205 1.21091 1.794 0.07286 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 147.317 on 120 degrees of freedom
Residual deviance: 94.893 on 88 degrees of freedom
AIC: 160.89
Number of Fisher Scoring iterations: 6
Setting levels: control = NR, case = R
Setting direction: controls < cases
Confusion Matrix and Statistics
actual
predicted NR R
NR 23 10
R 13 75
Accuracy : 0.8099
95% CI : (0.7286, 0.8755)
No Information Rate : 0.7025
P-Value [Acc > NIR] : 0.005027
Kappa : 0.5341
Mcnemar's Test P-Value : 0.676657
Sensitivity : 0.6389
Specificity : 0.8824
Pos Pred Value : 0.6970
Neg Pred Value : 0.8523
Prevalence : 0.2975
Detection Rate : 0.1901
Detection Prevalence : 0.2727
Balanced Accuracy : 0.7606
'Positive' Class : NR
Logistic regression with random signatures
random_signatures <- lapply(1:1000,function(i){
sample(unique_ortholog_mapping$ortholog_name,length(signature_human))
})
random_accuracy <- sapply(random_signatures, function(random_signature){
logistic_glm(meta_M6, expr, random_signature)
})
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
ggplot(data.frame(random_accuracy=random_accuracy), aes(x=random_accuracy)) +
geom_histogram(bins=30) +
geom_vline(xintercept=accuracy, color="red")
Feature selection with logistic glmnet
logistic_glmnet(meta_MI, expr, signature_human)
Call: cv.glmnet(x = x, y = y, type.measure = "class", family = "binomial")
Measure: Misclassification Error
Lambda Index Measure SE Nonzero
min 0.05889 1 0.2925 0.02646 0
1se 0.05889 1 0.2925 0.02646 0
Logistic regression with full signature
# Analysis with glm
accuracy <- logistic_glm(meta_MI, expr, signature_human, T, T, T)
Call:
glm(formula = RECURRENCE ~ ., family = "binomial", data = data)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.3934 -0.7820 0.4690 0.7313 2.2220
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.17530 24.79441 -0.168 0.8663
ALDOB 2.26021 1.58793 1.423 0.1546
APOA1 1.00416 0.53227 1.887 0.0592 .
APOB -2.01022 1.00052 -2.009 0.0445 *
APOC3 -0.30466 0.52124 -0.584 0.5589
ARG1 -1.85550 1.28265 -1.447 0.1480
ASNS 0.22660 0.75871 0.299 0.7652
CACNA1A 0.01282 0.77252 0.017 0.9868
CD274 0.56382 0.57507 0.980 0.3269
CPS1 0.53332 0.65796 0.811 0.4176
CREB3L3 -0.54022 0.47594 -1.135 0.2564
CXCL9 -0.30102 0.37615 -0.800 0.4236
DMGDH 0.41788 1.88106 0.222 0.8242
DUOX2 0.10864 0.19018 0.571 0.5678
FAHD1 0.36869 0.80406 0.459 0.6466
ICOS -1.39655 0.63234 -2.209 0.0272 *
IDO1 0.78642 0.52369 1.502 0.1332
INMT 0.43336 0.54341 0.797 0.4252
KYAT3 0.99032 1.16555 0.850 0.3955
NOS2 0.52479 0.46142 1.137 0.2554
NOX1 0.27130 0.85806 0.316 0.7519
OTC -0.83141 0.74064 -1.123 0.2616
PCK1 -0.21972 0.48399 -0.454 0.6498
PDK4 -0.11671 0.32671 -0.357 0.7209
PHGDH -0.37393 1.09492 -0.342 0.7327
PSAT1 -0.36391 0.58402 -0.623 0.5332
PTK6 0.16596 0.76056 0.218 0.8273
RIMKLA -0.51355 1.12269 -0.457 0.6474
SLC7A11 -0.46536 0.44119 -1.055 0.2915
SPIB -0.37485 0.48838 -0.768 0.4428
STAT1 -0.87887 0.81733 -1.075 0.2822
TLR4 0.45989 0.64286 0.715 0.4744
TNF 0.72373 0.98126 0.738 0.4608
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 177.69 on 146 degrees of freedom
Residual deviance: 142.78 on 114 degrees of freedom
AIC: 208.78
Number of Fisher Scoring iterations: 5
Setting levels: control = NR, case = R
Setting direction: controls < cases
Confusion Matrix and Statistics
actual
predicted NR R
NR 17 9
R 26 95
Accuracy : 0.7619
95% CI : (0.6847, 0.8282)
No Information Rate : 0.7075
P-Value [Acc > NIR] : 0.085037
Kappa : 0.3493
Mcnemar's Test P-Value : 0.006841
Sensitivity : 0.3953
Specificity : 0.9135
Pos Pred Value : 0.6538
Neg Pred Value : 0.7851
Prevalence : 0.2925
Detection Rate : 0.1156
Detection Prevalence : 0.1769
Balanced Accuracy : 0.6544
'Positive' Class : NR
Logistic regression with random signatures
random_signatures <- lapply(1:1000,function(i){
sample(unique_ortholog_mapping$ortholog_name,length(signature_human))
})
random_accuracy <- sapply(random_signatures, function(random_signature){
logistic_glm(meta_MI, expr, random_signature)
})
ggplot(data.frame(random_accuracy=random_accuracy), aes(x=random_accuracy)) +
geom_histogram(bins=30) +
geom_vline(xintercept=accuracy, color="red")